1 Clusters

tar_load(c(clust_curv_site, clust_curv_site_fac_50))
tar_load(c(clust_curv_site_red_fac_1, clust_curv_site_red_fac_50))
tar_load(c(clust_curv_site_tot_fac_1, clust_curv_site_tot_fac_50))
tar_load(c(site_cl_rm, site_cl_rm_red, site_cl_rm_tot))

tar_load(c(
    gaussian_inla_no_drivers_adj_re,
    gaussian_inla_exo_no_drivers_adj_re)
)
tot <- rbind(
     gaussian_inla_no_drivers_adj_re,
     gaussian_inla_exo_no_drivers_adj_re
     )
  • Distribution of all biodiversity facets:
tot %>%
  ggplot(aes(x = mean)) +
  geom_histogram() +
  facet_wrap(vars(response), nrow = 2, scales = "free")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

  • Even with scaling, we see that the variability of the temporal trends of percentage of exotic species and abundance are very low (Fig. ??). It is the reason why we exclude those variables from the clustering method (but see below), i.e. they cannot discriminate cluster because they vary very little.
tot %>%
  group_by(response) %>%
  mutate(mean = mean / sd(mean)) %>%
  ggplot(aes(x = mean)) +
  geom_histogram() +
  facet_wrap(vars(response), nrow = 2, scales = "free")
#> `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

2 PCA

  • With only variables presented in the first figure:
pca_clust_red <- compute_rotated_pca(
  site_no_drivers_inla_tot[,
    clust_var_alter
    #clust_var_alter[clust_var_alter != "perc_exo_sp"]
    ],
  naxis = 5
)

axis_l <- list(
  c("RC1", "RC2"),
  c("RC1", "RC3"),
  c("RC1", "RC4"),
  c("RC1", "RC5"),
  c("RC2", "RC3"),
  c("RC2", "RC4"),
  c("RC2", "RC5"),
  c("RC3", "RC4"),
  c("RC3", "RC5"),
  c("RC4", "RC5")
)
p_pca_red <- map(axis_l,
  ~ plot_pca_clust(
     .data = pca_clust_red$rotated,
     site_cl = site_cl_rm_red,
     add_point = FALSE,
     add_ellipse = FALSE,
     xaxis = .x[1], yaxis = .x[2],
     ctb_thld = .2,
     replace_var = get_var_replacement(),
     size_arrows_segment = 1,
     label_size = 2.5,
     alpha_point = .2,
     lim_x_y = c(-1, 1),
     force_pull = 1,
     force = 80,
     var_scaling_factor = 1
   )
)
plot_grid(plotlist = p_pca_red, ncol = 2)

The PCA on the variables displayed in fig 1 shows (Fig. ??) that those variables represent independant facets of biodiversity as the five variables are well represented on 5 axis, each axis explaining the same amountof the variation, i.e. 20%.

2.1 PCA with all facets of biodiversity

pca_clust_tot <- compute_rotated_pca(site_no_drivers_inla_tot, naxis = 5)

p_pca_tot <- map(axis_l,
  ~ plot_pca_clust(
     .data = pca_clust_tot$rotated,
     site_cl = site_cl_rm_tot,
     add_point = FALSE,
     add_ellipse = FALSE,
     xaxis = .x[1], yaxis = .x[2],
     ctb_thld = .2,
     replace_var = get_var_replacement(),
     size_arrows_segment = 1,
     label_size = 2.5,
     alpha_point = .2,
     lim_x_y = c(-1, 1),
     force_pull = 1,
     force = 80,
     var_scaling_factor = 1
   )
)
plot_grid(plotlist = p_pca_tot, ncol = 2)

Those PCA containing all biodiversity facets shows (Fig. ??) relatively similar results than the one in the main text (i.e. without exotic species variables). We see the same independant partitions: temporal trends of species richness, dissimilarity, percentage of exotic species and abundance, Nestedness and of total abundance are respectively related to axis 1, 2, 3, 4 and 5.

2.2 PCA presented in the main text


pca_clust <- compute_rotated_pca(site_no_drivers_inla, naxis = 4)

axis_l4 <- axis_l[!map_lgl(axis_l, ~any(str_detect(.x, "RC5")))]
axis_l3 <- axis_l[!map_lgl(axis_l, ~any(str_detect(.x, "RC5|RC4")))]
p_pca <- map(axis_l4,
  ~ plot_pca_clust(
     .data = pca_clust$rotated,
     site_cl = site_cl_rm_tot,
     add_point = FALSE,
     add_ellipse = FALSE,
     xaxis = .x[1], yaxis = .x[2],
     ctb_thld = .2,
     replace_var = get_var_replacement(),
     size_arrows_segment = 1,
     label_size = 2.5,
     alpha_point = .2,
     lim_x_y = c(-1, 1),
     force_pull = 1,
     force = 80,
     var_scaling_factor = 1
   )
)
plot_grid(plotlist = p_pca, ncol = 2)

Comparing the PCA without (main text and here, Fig. ??) and the PCA containing the exotic species variables (Fig. ??), we see that they contain the same independant axis, except obviously the one related of exoctic species.

In the PCA figure of the main text, I then suggest that we keep all the variables, i.e. including the percentage of exotic species and the percentage of exotic species abundance. Concerning the clusters, the story is a bit different.

  • So the PCA figure, without the clusters could look like this:
plot_grid(plotlist = p_pca_tot[c(1, 5, 6, 7)], ncol = 2
)

3 Clusters

The clustering failed with the inclusion of percentage of exotic species and abundance. It resulted in a cluster partition only based on exotic fishes temporal trends. One explanation is that kmeans is known for giving more weight to variable with less variation (ref1). Moreover, it does not make a lot of sense to include variables that mostly have no trends and little variation in a cluster analysis.

target_bp_cl_dist(cl_obj = site_cl_rm_red) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0))

  • The same problematic behavior of cluster applies if we include all the facets of biodiversity (meaning jaccard in this decomposition), exotic species trends are still found the be the most discriminant variable although their relative weak variability:
target_bp_cl_dist(cl_obj = site_cl_rm_tot) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0))

  • Cluster of the main text:
target_bp_cl_dist(cl_obj = site_cl_rm) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0))

  • If we add exotic species variable:
add_to_boxplot <- site_no_drivers_inla_tot %>%
  select(perc_exo_sp, perc_exo_abun) %>%
  mutate(across(everything(), ~scale(., center = FALSE)[,1])) %>%
  rownames_to_column("siteid") %>%
  as_tibble()
target_bp_cl_dist(
  cl_obj = left_join(site_cl_rm, add_to_boxplot,
    by = "siteid"
    )
) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0))

So, outcome of PCA and cluster tries conclude that we should stay with all biodiversity facets without exotic species.

3.1 Choice of clusters

plot(clust_curv_site)

plot(clust_curv_site_fac_50)

k4_fac_50_cl <- tclust(
  x = site_no_drivers_inla_tot[, clust_var] %>%
  mutate(across(everything(), ~scale(., center = FALSE)[,1])),
  iter.max = 150,
  k = 4,
  alpha = 0.05,
  restr.fact = 50,
  warnings = 2
)
  • Clusters defined by richness change (cl 4), turnover (cl 2), disappearance / richness change (cl 3):
site_k4_fac_50_cl <- get_cluster_df(
     tclust_obj = k4_fac_50_cl,
     site_env = site_env,
     assign_threshold = .5,
     clean_method = "rm"
     )
target_bp_cl_dist(cl_obj = site_k4_fac_50_cl) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0))

k4_fac_1_cl <- tclust(
  x = site_no_drivers_inla_tot[, clust_var] %>%
  mutate(across(everything(), ~scale(., center = FALSE)[,1])),
  iter.max = 150,
  k = 4,
  alpha = 0.05,
  restr.fact = 1,
  warnings = 2
)
plot(k4_fac_1_cl)

site_k4_fac_1_cl <- get_cluster_df(
     tclust_obj = k4_fac_1_cl,
     site_env = site_env,
     assign_threshold = .5,
     clean_method = "rm"
     )
target_bp_cl_dist(cl_obj = site_k4_fac_1_cl) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0))

3.1.0.1 5 clusters

k5_fac_1_cl <- tclust(
  x = site_no_drivers_inla_tot[, clust_var] %>%
  mutate(across(everything(), ~scale(., center = FALSE)[,1])),
  iter.max = 150,
  k = 5,
  alpha = 0.05,
  restr.fact = 1,
  warnings = 2
)
plot(k5_fac_1_cl)

  • With 5 clusters, we see a bit a better but still without abundances
site_k5_fac_1_cl <- get_cluster_df(
     tclust_obj = k5_fac_1_cl,
     site_env = site_env,
     assign_threshold = .5,
     clean_method = "rm"
     )
target_bp_cl_dist(cl_obj = site_k5_fac_1_cl) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0))

k5_fac_50_cl <- tclust(
  x = site_no_drivers_inla_tot[, clust_var] %>%
  mutate(across(everything(), ~scale(., center = FALSE)[, 1])),
  iter.max = 150,
  k = 5,
  alpha = 0.05,
  restr.fact = 50,
  warnings = 2
)
plot(k5_fac_1_cl)

3.1.1 6 clusters

k6_fac_50_cl <- tclust(
  x = site_no_drivers_inla_tot[, clust_var] %>%
  mutate(across(everything(), ~scale(., center = FALSE)[, 1])),
  iter.max = 150,
  k = 6,
  alpha = 0.05,
  restr.fact = 50,
  warnings = 2
)
plot(k6_fac_50_cl)

  • With 6 clusters, we see a bit a better but still without abundances
site_k6_fac_50_cl <- get_cluster_df(
     tclust_obj = k6_fac_50_cl,
     site_env = site_env,
     assign_threshold = .5,
     clean_method = "rm"
     )
target_bp_cl_dist(cl_obj = site_k6_fac_50_cl) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0))

3.1.1.0.1 More restricted cluster
k6_fac_1_cl <- tclust(
  x = site_no_drivers_inla_tot[, clust_var] %>%
  mutate(across(everything(), ~scale(., center = FALSE)[,1])),
  iter.max = 150,
  k = 6,
  alpha = 0.05,
  restr.fact = 1,
  warnings = 2
)
plot(k6_fac_1_cl)

  • With 6 clusters, we have the no change cluster with is really interestingly
site_k6_fac_1_cl <- get_cluster_df(
     tclust_obj = k6_fac_1_cl,
     site_env = site_env,
     assign_threshold = .5,
     clean_method = "rm"
     )
target_bp_cl_dist(cl_obj = site_k6_fac_1_cl) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0))

3.1.1.1 7 clusters

The added cluster is a super increase in abundance

k7_fac_1_cl <- tclust(
  x = site_no_drivers_inla_tot[, clust_var] %>%
  mutate(across(everything(), ~scale(., center = FALSE)[,1])),
  iter.max = 150,
  k = 7,
  alpha = 0.05,
  restr.fact = 1,
  warnings = 2
)
plot(k7_fac_1_cl)

site_k7_fac_1_cl <- get_cluster_df(
     tclust_obj = k7_fac_1_cl,
     site_env = site_env,
     assign_threshold = .5,
     clean_method = "rm"
     )
target_bp_cl_dist(cl_obj = site_k7_fac_1_cl) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0))

3.1.1.2 8 clusters

The added cluster is one with both increase in species richness and turnover

k8_fac_1_cl <- tclust(
  x = site_no_drivers_inla_tot[, clust_var] %>%
  mutate(across(everything(), ~scale(., center = FALSE)[,1])),
  iter.max = 150,
  k = 8,
  alpha = 0.05,
  restr.fact = 1,
  warnings = 2
)
plot(k8_fac_1_cl)

site_k8_fac_1_cl <- get_cluster_df(
     tclust_obj = k8_fac_1_cl,
     site_env = site_env,
     assign_threshold = .5,
     clean_method = "rm"
     )
target_bp_cl_dist(cl_obj = site_k8_fac_1_cl) +
  theme(axis.text.x = element_text(angle = 20, vjust = 0))

3.2 Cluster number check

  • With our analysis, we could have go up to 8 clusters:
plot(clust_curv_site)

plot(clust_curv_site_fac_50)

  • Clusters with exotic variables:
plot(clust_curv_site_tot_fac_50)

plot(clust_curv_site_tot_fac_1)

  • With only independant variables:
plot(clust_curv_site_red_fac_1)

plot(clust_curv_site_red_fac_50)

3.3 Conclusion

  • The goal of the PCA is to show the dimensionality of biodiversity facets:
    • We show Five over 10 variables (i.e. same number of dimension than of number of variables than in the figure one!!!)
  • Cluster analysis allows to display the heterogeneity of temporal trends locally and a relative homogeneity globally
p_pca_cl <- map(axis_l,
  ~ plot_pca_clust(
     .data = pca_clust_tot$rotated,
     site_cl = site_k6_fac_1_cl,
     add_point = TRUE,
     add_ellipse = FALSE,
     xaxis = .x[1], yaxis = .x[2],
     ctb_thld = .2,
     replace_var = get_var_replacement(),
     size_arrows_segment = 1,
     label_size = 2.5,
     alpha_point = .2,
     lim_x_y = c(-3.5, 3.5),
     force_pull = 1,
     force = 80,
     var_scaling_factor = 3.5
   )
)
plot_grid(plotlist = p_pca_cl, ncol = 2)
#> Warning: Removed 53 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Removed 1 rows containing missing values (geom_point).
#> Warning: Removed 53 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Removed 1 rows containing missing values (geom_point).
#> Warning: Removed 54 rows containing missing values (geom_point).
#> Removed 54 rows containing missing values (geom_point).
#> Warning: Removed 2 rows containing missing values (geom_point).

  • Paper figure:
plot_grid(plotlist = p_pca_cl[c(1, 5, 6, 7)], ncol = 2
)
#> Warning: Removed 53 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Removed 1 rows containing missing values (geom_point).

p_pca_cl_ell <- map(axis_l,
  ~ plot_pca_clust(
     .data = pca_clust_tot$rotated,
     site_cl = site_k6_fac_1_cl,
     add_point = TRUE,
     add_ellipse = TRUE,
     xaxis = .x[1], yaxis = .x[2],
     ctb_thld = .2,
     replace_var = get_var_replacement(),
     size_arrows_segment = 1,
     label_size = 2.5,
     alpha_point = .2,
     lim_x_y = c(-3.5, 3.5),
     force_pull = 1,
     force = 80,
     var_scaling_factor = 3.5
   )
)
plot_grid(
  plotlist = p_pca_cl_ell[c(1, 5, 6, 7)], ncol = 2
)
#> Warning: Removed 53 rows containing non-finite values (stat_ellipse).
#> Warning: Removed 53 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing non-finite values (stat_ellipse).
#> Warning: Removed 1 rows containing missing values (geom_point).
#> Warning: Removed 1 rows containing non-finite values (stat_ellipse).
#> Warning: Removed 1 rows containing missing values (geom_point).

3.4 Analysis

3.5 Reproducibility

Reproducibility receipt

## datetime
Sys.time()
#> [1] "2022-04-27 23:33:04 CDT"

## repository
if(requireNamespace('git2r', quietly = TRUE)) {
  git2r::repository()
} else {
  c(
    system2("git", args = c("log", "--name-status", "-1"), stdout = TRUE),
    system2("git", args = c("remote", "-v"), stdout = TRUE)
  )
}
#>  [1] "commit 2dec419f9a4959f7315dc81cfe2477bd9c45985b"                            
#>  [2] "Author: Alain Danet <alain.danet@mnhn.fr>"                                  
#>  [3] "Date:   Tue Apr 26 17:55:26 2022 -0500"                                     
#>  [4] ""                                                                           
#>  [5] "    Add PCA plot and scaling factor option"                                 
#>  [6] ""                                                                           
#>  [7] "M\tR/plot_paper.R"                                                          
#>  [8] "M\t_targets.R"                                                              
#>  [9] "origin\tgit@github.com:alaindanet/RivFishTimeBiodiversityFacets.git (fetch)"
#> [10] "origin\tgit@github.com:alaindanet/RivFishTimeBiodiversityFacets.git (push)"

## session info
sessionInfo()
#> R version 4.0.5 (2021-03-31)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Debian GNU/Linux 11 (bullseye)
#> 
#> Matrix products: default
#> BLAS:   /home/alain/.Renv/versions/4.0.5/lib/R/lib/libRblas.so
#> LAPACK: /home/alain/.Renv/versions/4.0.5/lib/R/lib/libRlapack.so
#> 
#> locale:
#>  [1] LC_CTYPE=fr_FR.UTF-8       LC_NUMERIC=C              
#>  [3] LC_TIME=fr_FR.UTF-8        LC_COLLATE=fr_FR.UTF-8    
#>  [5] LC_MONETARY=fr_FR.UTF-8    LC_MESSAGES=fr_FR.UTF-8   
#>  [7] LC_PAPER=fr_FR.UTF-8       LC_NAME=C                 
#>  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
#> [11] LC_MEASUREMENT=fr_FR.UTF-8 LC_IDENTIFICATION=C       
#> 
#> attached base packages:
#> [1] parallel  stats     graphics  grDevices utils     datasets  methods  
#> [8] base     
#> 
#> other attached packages:
#>  [1] tclust_1.4-2            ggeffects_1.0.1        
#>  [3] report_0.5.0.9000       see_0.6.8.1            
#>  [5] correlation_0.8.0       modelbased_0.7.1.1     
#>  [7] effectsize_0.6.0.2      parameters_0.16.0      
#>  [9] performance_0.8.0       bayestestR_0.11.5      
#> [11] datawizard_0.2.3        insight_0.15.0         
#> [13] easystats_0.4.3         glmmTMB_1.1.2.9000     
#> [15] inlatools_0.0.1.9001    INLA_21.11.22          
#> [17] sp_1.4-6                foreach_1.5.1          
#> [19] Matrix_1.3-2            future_1.24.0          
#> [21] slider_0.2.2            vegan_2.5-7            
#> [23] lattice_0.20-41         permute_0.9-5          
#> [25] codyn_2.0.5             janitor_2.1.0          
#> [27] viridis_0.6.2           viridisLite_0.4.0      
#> [29] cowplot_1.1.1           terra_1.5-21           
#> [31] rnaturalearthdata_0.1.0 rnaturalearth_0.1.0    
#> [33] sf_1.0-6                scales_1.1.1           
#> [35] kableExtra_1.3.4.9000   here_1.0.1             
#> [37] lubridate_1.8.0         magrittr_2.0.2         
#> [39] forcats_0.5.1           stringr_1.4.0          
#> [41] dplyr_1.0.8             purrr_0.3.4            
#> [43] readr_2.1.1             tidyr_1.2.0            
#> [45] tibble_3.1.6            ggplot2_3.3.5          
#> [47] tidyverse_1.3.1         tarchetypes_0.3.2      
#> [49] rmarkdown_2.11          nvimcom_0.9-124        
#> [51] targets_0.8.1           conflicted_1.1.0       
#> 
#> loaded via a namespace (and not attached):
#>   [1] readxl_1.3.1        backports_1.2.1     systemfonts_1.0.0  
#>   [4] igraph_1.3.0        TMB_1.7.20          splines_4.0.5      
#>   [7] listenv_0.8.0       TH.data_1.0-10      digest_0.6.27      
#>  [10] htmltools_0.5.2     fansi_0.5.0         memoise_2.0.0      
#>  [13] cluster_2.1.1       tzdb_0.2.0          globals_0.14.0     
#>  [16] modelr_0.1.8        sandwich_3.0-0      svglite_2.0.0      
#>  [19] prettyunits_1.1.1   colorspace_2.0-0    ggrepel_0.9.1      
#>  [22] rvest_1.0.2         warp_0.2.0          haven_2.5.0        
#>  [25] xfun_0.30           callr_3.7.0         crayon_1.4.2       
#>  [28] jsonlite_1.7.2      lme4_1.1-26         survival_3.2-10    
#>  [31] zoo_1.8-8           iterators_1.0.13    glue_1.6.2         
#>  [34] gtable_0.3.0        emmeans_1.7.2       webshot_0.5.2      
#>  [37] mvtnorm_1.1-1       DBI_1.1.1           Rcpp_1.0.6         
#>  [40] progress_1.2.2      xtable_1.8-4        tmvnsim_1.0-2      
#>  [43] units_0.8-0         httr_1.4.2          ellipsis_0.3.2     
#>  [46] farver_2.0.3        pkgconfig_2.0.3     sass_0.4.0         
#>  [49] dbplyr_2.1.1        utf8_1.2.2          labeling_0.4.2     
#>  [52] tidyselect_1.1.1    rlang_1.0.2         munsell_0.5.0      
#>  [55] cellranger_1.1.0    tools_4.0.5         cachem_1.0.4       
#>  [58] cli_3.2.0           generics_0.1.0      ade4_1.7-16        
#>  [61] sjlabelled_1.1.7    broom_0.7.12        evaluate_0.15      
#>  [64] fastmap_1.1.0       yaml_2.2.1          processx_3.5.2     
#>  [67] knitr_1.38          fs_1.5.2            nlme_3.1-152       
#>  [70] xml2_1.3.2          compiler_4.0.5      rstudioapi_0.13    
#>  [73] e1071_1.7-4         reprex_2.0.1        statmod_1.4.35     
#>  [76] bslib_0.2.4         stringi_1.7.6       highr_0.9          
#>  [79] ps_1.6.0            psych_2.0.12        classInt_0.4-3     
#>  [82] nloptr_1.2.2.2      vctrs_0.3.8         pillar_1.6.4       
#>  [85] lifecycle_1.0.1     jquerylib_0.1.3     estimability_1.3   
#>  [88] data.table_1.13.6   R6_2.5.1            bookdown_0.24      
#>  [91] KernSmooth_2.23-18  gridExtra_2.3       parallelly_1.30.0  
#>  [94] codetools_0.2-18    boot_1.3-27         MASS_7.3-53.1      
#>  [97] assertthat_0.2.1    rprojroot_2.0.2     withr_2.4.3        
#> [100] mnormt_2.0.2        multcomp_1.4-16     mgcv_1.8-34        
#> [103] hms_1.1.1           grid_4.0.5          coda_0.19-4        
#> [106] class_7.3-18        minqa_1.2.4         snakecase_0.11.0   
#> [109] numDeriv_2016.8-1.1